/*
File written by Emily Smith-Greenaway, Associate Professor of Sociology, USC (smithgre@usc.edu)
This file reproduces findings reported in: 
Smith-Greenaway, Emily and Abigail Weitzman. 2020. Sibling Mortality Burden in Low-Income Countries:
A Descriptive Analysis of Sibling Death in Africa, Asia, and Latin America and the Caribbean, 
PLOS One.  
This do-file makes use of data generated in "1_Smith-GreenawayWeitzman2020PLOSOne_replication_append_data.do" to generate 
the statistics reported in the study. 
The data required to run this do-file are publicly available at dhsprogram.com (user must register for data)
*/

clear
use append_ssa.dta 
append using append_asia.dta 
append using append_latinamerica.dta 

gen id=_n
duplicates report id

gen svyweight = v005/1000000
gen strata = v022
svyset [pweight=svyweight], psu(v001) strata(strata) 
 
*Removing all respondents older than 35 and born before 1985
gen yearborn=v007-v012
gen age = v012
drop if age>=35
drop if yearborn<=1984 & country_!="Ethiopia" 
*accounting for Ethiopian calendar vs. Gregorgian calendar (Ethiopian calendar is 8 yrs delayed)  
drop if yearborn<=1976 & country_=="Ethiopia"

*Generating indicators of sibling death before birth & during lifetime, excluding those who are unaware of timing of death
forv i=1/9 {
rename mm6_0`i' mm6_`i'
}

gen beforebirth=0
forv i=1/20 {
replace beforebirth = 1 if mm6_`i'>age & mm6_`i'!=. & mm6_`i'<97
}

gen lifetime=0
forv i=1/20 {
replace lifetime = 1 if mm6_`i'<=age & mm6_`i'!=. & mm6_`i'<97
}

*Generating age first sibling died and indicator for ever having sibling die
forv i=1/20 {
gen yearsincedeath`i'=mm6_`i' 
recode yearsincedeath`i' 97/99=.
}

forv i=1/20 {
gen agesibdied`i'=age-yearsincedeath`i'
}
forv i=1/20 {
replace agesibdied`i'=. if agesibdied`i'<0
}

egen firstsibdied=rowmin(agesibdied1-agesibdied20)
**removing the few cases of sibling death after age 25 given focus on deaths before age 24
replace lifetime=. if firstsibdied>=25 & firstsibdied<=33
**removing the few cases of sibling death after age 25 given focus on deaths before age 24
replace firstsibdied=. if firstsibdied>=25
gen eversibdied=0 
replace eversibdied=1 if firstsibdied!=. 
replace eversibdied=1 if beforebirth==1
*including deaths reported in "ever death" indicator, even if respondent was not aware of when the sibling died
forv i=1/20 {
replace eversibdied = 1 if mm6_`i'>=97 & mm6_`i'<=99
}

*Generating sex of first deceased sibling
gen sibflag=.
forv i=1/20 {
replace sibflag=`i' if firstsibdied==agesibdied`i'
}
replace sibflag=. if firstsibdied==.

forv i=1/9 {
rename mm1_0`i' mm1_`i'
}

gen genderfirstsibdied = . 
forv i=1/20 {
replace genderfirstsibdied = mm1_`i' if sibflag==`i'
}
recode genderfirstsibdied 8/9=.

*Generating indicator for firstborn
forv i=1/9 {
rename mm4_0`i' mm4_`i'
}

**note converting century months to calendar year 
forv i=1/20 {
gen yearsibbirthALL`i'=1900+ int(mm4_`i'/12)
}
egen yearsibbirthALL = rowmin(yearsibbirthALL1-yearsibbirthALL20)
gen firstborn=yearsibbirthALL-yearborn
recode firstborn 1/95=1 -99/-1=0
replace firstborn=. if mmc1==0

egen yearsibbirthALL_ = rowmax(yearsibbirthALL1-yearsibbirthALL20)
gen lastborn=0
replace lastborn=1 if yearsibbirthALL_<yearborn

gen middlechild=0 
replace middlechild=1 if lastborn==0 & firstborn==0

*Generating indicator for relative age of respondent to the deceased 
**note converting century months to calendar year 
forv i=1/20 {
gen yearsibbirth`i'=1900+ int(mm4_`i'/12)
}

forv i=1/20 {
replace yearsibbirth`i'=. if sibflag!=`i'
}

egen yearsibbirth = rowmax(yearsibbirth1-yearsibbirth20) 
gen diff=yearsibbirth-yearborn
gen older=diff
recode older 1/95=0 -100/-1=1

*Age of respondent at the time sibling died
gen hypoage= v007-yearsibbirth 
forv i=1/20 {
gen ageatdeath`i'=hypoage-yearsincedeath`i' if sibflag==`i'
}
egen ageatdeath=rowmax(ageatdeath1-ageatdeath20)
drop if ageatdeath<0

*Age of death est. differently in Peru. Below command pertains to Peru
forv i=1/9{
rename mm7_0`i' mm7_`i'
}
forv i=1/19 {
gen ageatdeath`i'_peru=mm7_`i' if sibflag==`i' & country=="Peru"
}
egen ageatdeath_peru=rowmax(ageatdeath1_peru-ageatdeath19_peru)

*Indicator of whether more than one sibling died 
gen totalsib=mmc1
egen totallost=rownonmiss(mm6_1-mm6_20) if eversibdied==1
recode totallost .=0
gen remainingsiball=totalsib-totallost 

sort group country_

*****************
*****************
/*
Figure 1. Percentage of young women (15-34) who have at least one 
deceased sibling before age 25 in 43 countries across Western, Central,
Eastern, and Southern Africa, South and Southeast Asia, and Latin America and the Caribbean
*map created in mapchart.net*
*/
by group country_: tab eversibdied [aweight=v005/1000000]

*****************
*****************
/* 
Figure 3. Average age at time of first sibling death, by country 
*/
sort country_
by country_: sum firstsibdied [aweight=v005/1000000] if eversibdied==1

*****************
*****************
/*
Figure 4. Age and sex distribution of decease siblings, by region
*With these tabulations, I created a population pyramid in excel. E-mail for file: smithgre@usc.edu
*/
gen brothersage=ageatdeath if genderfirstsibdied==1
gen sistersage=ageatdeath if genderfirstsibdied==2
recode brothersage 0/1=1 2/4=2 5/9=3 10/14=4 15/19=5 20/24=6 25/29=7 30/34=8 35/39=9 40/100=10 
recode sistersage 0/1=1 2/4=2 5/9=3 10/14=4 15/19=5 20/24=6 25/29=7 30/34=8 35/39=9 40/100=10 

forv i=1/6 {
tab brothersage if firstsibdied!=. & group==`i' [aweight=v005/1000000]
tab sistersage if firstsibdied!=. & group==`i'  [aweight=v005/1000000]
}

*****************
*****************
/* 
Figure 5. Estimates of the percentage of bereaved siblings who have lost 1, 2, 3 , and 4 or more siblings
*/
sort country_
by country_: tab totallost [aweight=v005/1000000] if eversibdied==1

*****************
*****************
/*
Figure 6. Remaining number of siblings, by history of sibling loss and country
*/
sort country_
by country_: ci mean remainingsiball[aweight=v005/1000000] if totallost==0 
by country_: ci mean remainingsiball[aweight=v005/1000000] if eversibdied==1 & totallost==1
by country_: ci mean remainingsiball[aweight=v005/1000000] if eversibdied==1 & totallost==2
by country_: ci mean remainingsiball[aweight=v005/1000000] if eversibdied==1 & totallost==3 
by country_: ci mean remainingsiball[aweight=v005/1000000] if eversibdied==1 & totallost>=4 & totallost<=20


*****************
*****************
/* 
SI Table 1. List of countries, surveys, and corresponding samples and estimates
*/
*Total % ever had at least one sibling die
sort group country_
by group country_: tab eversibdied [aweight=v005/1000000]

*% Born to bereaved mother
by group country_: tab beforebirth [aweight=v005/1000000]

*% Exp sibling death during lifetime
by group country_: tab lifetime [aweight=v005/1000000]

*****************
*****************
/*
SI Table 2. Additional characteristics of deceased siblings
*/
*% Deceased sibling female
by group country_: tab genderfirstsibdied [aweight=v005/1000000]

*Sibling's age at time of death 
by group country_: sum firstsibdied [aweight=v005/1000000] if eversibdie==1 & firstsibdied!=.

*% Deceased sibling older than focal respondent 
by group country_: tab older  [aweight=v005/1000000]

*****************
*****************
/*
SI Figure 2. Percent of respondents born to bereaved mother (left panel) and 
percent of respondents who ever had a sibling die ( right panel)
probability of experiencing a sibling die between ages 0 and 25 among young women 
in 43 countries, by geographic region and country
*/
sort group 
by group: tab beforebirth if middlechild==1 [aweight=v005/1000000]
by group: tab beforebirth if lastborn==1    [aweight=v005/1000000]

by group: tab lifetime if firstborn==1      [aweight=v005/1000000]
by group: tab lifetime if middlechild==1    [aweight=v005/1000000]
by group: tab lifetime if lastborn==1       [aweight=v005/1000000]

*****************
*****************
/* 
Figure 2. Cumulative probability of experiencing a sibling die between ages 0 and 25 among young women in 43 countries, by
geographic region
*/

gen obperiod=v007-yearborn 
replace obperiod=firstsibdied if firstsibdied!=.
recode firstsibdied .=0
gen failed=0 
replace failed=1 if firstsibdied
stset obperiod, failure(failed) id(id)
stsplit time, every(1)
drop if obperiod>=25
recode failed .=0
stset obperiod, failure(failed) id(id)

sts graph, failure ci by(group) 

*****************
*****************
/*
SI Figure 1. Cumulative probability of experiencing a sibling die between ages 0 and 25 among young women in 43
countries, by geographic region and country
*/
*run separately by "group" variable
sts graph, failure ci by(country_) 
